**************************
***	RUN USING STATA 16 ***
**************************

* Purpose: calculate average English exam difficulty by year using classical test theory, output table.
* Last Updated: 06 May 2021
	
***********************************
*** English (regression setup)  ***
***********************************

	use "$output\eng_unique.dta", clear	
	
	*rename items	
	ren o_* item_o_*
	ren version booklet
	duplicates drop pid, force
	
	reshape long item, i(pid booklet) j(j) string
	ren item response
	ren j it
	
	*adjust item names to force order
	gen yr = substr(it, 4, 1)
	egen order = concat(yr it)
	
	*encode items
	encode order, gen(item)
	encode yr, gen(year)
	drop it yr order
	
	lab define year 1 "2011" 2 "2012" 3 "2013" 4 "2014" 5 "2015" 6 "2016" 7 "2017" 8 "2018" 9 "2019", modify 
	
	*regress items on year dummies (omitting 2015 as reference as lowest pass rate in WAEC data)
	*output tex table
	eststo clear
	eststo: reg response ib5.year
	estadd local hasbook "No"
	estadd local haspupil "No"
	eststo: reghdfe response ib5.year, absorb(booklet)
	estadd local hasbook "Yes"
	estadd local haspupil "No"
	eststo: reghdfe response ib5.year, absorb(booklet pid)
	estadd local hasbook "Yes"
	estadd local haspupil "Yes"
	esttab using "$table\eng_tab1.tex", b(3) se(3) label title("Item-level responses on year dummies") replace /*
	*/nogaps nomtitles booktabs star(* 0.10 ** 0.05 *** 0.01) scalars("hasbook Booklet FE" "haspupil Pupil FE")
	

	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	